#Load libraries#
library(ggplot2)
library(dplyr)
library(scales)
library(car)

#Rename data files#
n <- article_number
d <- data_collection
method <- data_analysis_method
analysis <- data_analysis_quality
sharing <- data_sharing

#Bar Plots, Article Number#
barplot <- ggplot(n,aes(x=year,y=number,fill=type)) + geom_bar(stat="identity",colour="black")
barplot <- barplot + facet_wrap(~factor(journal), strip.position = "bottom", scales = "free_x")
#stackedplot <- stackedplot + coord_flip()
barplot <- barplot + scale_x_continuous(breaks = c(2012:2016), 
                     labels = factor(2012:2016), 
                     limits = c(2011.5,2016.5))
barplot <- barplot + theme_bw() + theme(legend.position="bottom",
                                        panel.spacing = unit(0, "lines"), 
                                        strip.background = element_blank(),
                                        strip.placement = "outside",
                                        strip.text= element_text(colour="black", size = 11),
                                        aspect.ratio=1,
                                        legend.text=element_text(size=11),
                                        axis.line = element_line(size=1, colour = "black"),
                                        panel.grid.major = element_line(colour = "#d3d3d3"),
                                        panel.grid.major.x = element_blank(),
                                        panel.grid.minor = element_blank(),
                                        panel.border = element_blank(), 
                                        panel.background = element_blank(), 
                                        plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), 
                                        axis.text.x =element_text(colour="black", size = 11),
                                        axis.text.y =element_text(colour="black", size = 11))
barplot <- barplot + labs(fill ="Article Type")
barplot <- barplot +  scale_fill_manual(values = c("#1f78b4","#a6cee3"))
barplot <- barplot + geom_text(data=subset(n, number>0),aes(label=number),position=position_stack(vjust=0.5))
barplot <- barplot + xlab ("Journal Type") + ylab ("Number of Articles")
barplot
ggsave(barplot,file="barplot.pdf")


#Stacked Plot, Data Size#
d$size_range2<- d$size_range
d <- d %>% mutate(size_range2 = recode(size_range2, "c('50,000 - 100,000','10,000 - 50,000')='10,000 - 100,000'"))
d <- transform(d, size_range2 = factor(size_range2,levels = c("Over 100,000","10,000 - 100,000","1,000 - 10,000","0 - 1,000")))

d$journal<- "n/a"
d$journal[d$lang == "Chi"] <- "Chinese"
d$journal[d$lang == "Eng"] <- "English"

size_data <- d %>%
  count(journal,size_range2) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(size_data,aes(x=factor(journal),y=freq,fill=size_range2)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Datasets Size")
stackedplot <- stackedplot + scale_fill_manual(values = c("#2171b5", "#6baed6", "#bdd7e7","#eff3ff"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                        position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="stackedplot1.pdf")

#Stacked Plot, data collection method#
d <- transform(d, method = factor(method,levels = c("Web scraping","Manual","Data-sharing agreement","Unspecified")))

method_data <- d %>%
  count(journal,method) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(method_data,aes(x=factor(journal),y=freq,fill=method)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot + scale_y_continuous(labels = percent_format())+ labs(fill ="Data Access Method")
stackedplot <- stackedplot + scale_fill_manual(values = c("#a6cee3", "#1f78b4", "#b2df8a","#33a02c"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.07, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="stackedplot.pdf")

#Stacked Plot, data type#
type_data <- d %>%
  count(journal,type) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(type_data,aes(x=factor(journal),y=freq,fill=type)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), text=element_text(family="Calibri"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot + scale_y_continuous(labels = percent_format())+ labs(fill ="Data Type")
stackedplot <- stackedplot + scale_fill_manual(values = c("#a6cee3", "#1f78b4"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.07, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                   labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot

#Bar plot, analysis methods#
method <- transform(method, method = factor(method,levels = c("Regression","Survival Analysis","Experiment","Computer-aided text analysis","Decision Trees")))


barplot <- ggplot(method,aes(x=method,y=number,fill=type)) + geom_bar(stat="identity",colour="black")+coord_flip()
barplot <- barplot + facet_wrap(~factor(journal), strip.position = "bottom", scales = "free_x")
barplot <- barplot + theme_bw() + theme(strip.background = element_blank(),
                                        strip.placement = "outside",
                                        strip.text= element_text(colour="black", size = 11),
                                        aspect.ratio=1,
                                        legend.text=element_text(size=11),
                                        axis.line = element_line(size=1, colour = "black"),
                                        panel.grid.major = element_line(colour = "#d3d3d3"),
                                        panel.grid.major.y = element_blank(),
                                        panel.grid.major.x = element_blank(),
                                        panel.grid.minor = element_blank(),
                                        panel.border = element_blank(), 
                                        panel.background = element_blank(), 
                                        plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), 
                                        axis.text.x =element_text(colour="black", size = 11),
                                        axis.text.y =element_text(colour="black", size = 11))
barplot <- barplot + labs(fill ="Method Type")
barplot <- barplot +scale_fill_manual(values = c("#1f78b4","#a6cee3"))
barplot <- barplot + geom_text(data=subset(method,number>0),aes(label=number),position=position_stack(vjust=0.5))
barplot <- barplot + ylab ("Journal Type") + xlab ("")
barplot
ggsave(barplot,file="barplot2.pdf")

#Stacked Plot, outlier#
outlier_data <- analysis %>%
  count(journal,outlier) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(outlier_data,aes(x=factor(journal),y=freq,fill=outlier)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7","#969696"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 4_outlier.pdf")


#Stacked Plot, effect size#
analysis <- transform(analysis, effect_size = factor(effect_size,levels = c("Yes","No","Not applicable")))

effect_data <- analysis %>%
  count(journal,effect_size) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(effect_data,aes(x=factor(journal),y=freq,fill=effect_size)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd","#deebf7","#969696"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 4_effect.pdf")

#Stacked Plot, Measurement#
analysis <- transform(analysis, measurement = factor(measurement,levels = c("Sound","Slightly problematic","Problematic","Not applicable")))

measurement_data <- analysis %>%
  count(journal,measurement) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(measurement_data,aes(x=factor(journal),y=freq,fill=measurement)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7","#969696"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 4_measurement.pdf")

#Stacked Plot, robustness#
robustness_data <- analysis %>%
  count(journal, robustness) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(robustness_data,aes(x=factor(journal),y=freq,fill=robustness)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 4_robustness.pdf")


#Stacked Plot, assumption#
assumption_data <- analysis %>%
  count(journal, assumption) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(assumption_data,aes(x=factor(journal),y=freq,fill=assumption)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 4_assumption.pdf")

#Stacked Plot, supplement#
supplement_data <- analysis %>%
  count(journal, generalizablity) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(generalizability_data,aes(x=factor(journal),y=freq,fill=generalizablity)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 4_generalizability.pdf")

#Stacked Plot, supplement#
sharing <- transform(sharing, supplement = factor(supplement,levels = c("Yes","No")))

supplement_data <- sharing %>%
  count(journal, supplement) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(supplement_data,aes(x=factor(journal),y=freq,fill=supplement)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd","#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 6_supplement.pdf")

#Stacked Plot, supplement#
sharing <- transform(sharing, dataset = factor(dataset,levels = c("Yes","Data available upon Request","No")))

data_data <- sharing %>%
  count(journal, dataset) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(data_data,aes(x=factor(journal),y=freq,fill=dataset)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 6_data.pdf")


#Stacked Plot, code#
sharing <- transform(sharing, code = factor(code,levels = c("Original code","Pseudo code","No")))

code_data <- sharing %>%
  count(journal, code) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(code_data,aes(x=factor(journal),y=freq,fill=code)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd", "#9ecae1", "#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 6_code.pdf")

#Stacked Plot, presentation#
sharing <- transform(sharing, presentation = factor(presentation,levels = c("Yes","No")))

presentation_data <- sharing %>%
  count(journal, presentation) %>%
  mutate(freq = n/sum(n)) 

stackedplot <- ggplot(presentation_data,aes(x=factor(journal),y=freq,fill=presentation)) + geom_bar(stat="identity",position="fill",colour="black")
stackedplot <- stackedplot + theme_bw() + theme(axis.title.y=element_blank(),aspect.ratio=1,legend.text=element_text(size=11),axis.line = element_line(size=1, colour = "black"),panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.border = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust=0.5,size = 12, family = "Calibri", face = "bold"), axis.text.x=element_text(colour="black", size = 11), axis.text.y=element_text(colour="black", size = 11))
stackedplot <- stackedplot  + scale_y_continuous(labels = percent_format())+ labs(fill ="Category")
stackedplot <- stackedplot + scale_fill_manual(values = c("#3182bd","#deebf7"))
stackedplot <- stackedplot + geom_text(aes(label=ifelse(freq >= 0.05, paste0(sprintf("%.0f", freq*100),"%"),"")),
                                       position=position_stack(vjust=0.5))
stackedplot <- stackedplot + scale_x_discrete(name= "Journal Type",breaks=c("Chinese", "English"),
                                              labels=c("Chinese Papers\n(N=17)", "English Papers\n(N=25)"))
stackedplot
ggsave(stackedplot,file="Figure 6_presentation.pdf")